home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Celestin Apprentice 5
/
Apprentice-Release5.iso
/
Source Code
/
Libraries
/
Advanced I⁄O v2.3
/
Advanced i⁄o
/
histogram.cc
< prev
next >
Wrap
Text File
|
1995-05-29
|
4KB
|
135 lines
// This may look like C code, but it is really -*- C++ -*-
/*
************************************************************************
*
* Computing the Histogram
* and related operations
* $Id$
*
************************************************************************
*/
#ifdef __GNUC__
#pragma implementation
#endif
#include "histogram.h"
#include "myenv.h"
#include "std.h"
#include <math.h>
#ifndef M_LOG2E
#define M_LOG2E 1.4426950408889634074
#endif
// Constructor: constructs a clean histogram
// ready to accumulate statistics for symbols
// within [lwb,upb]
Histogram::Histogram(const Histogram::Symbol lwb, const Histogram::Symbol upb)
: symbol_lwb(lwb), symbol_upb(upb),
no_potential_symbols(upb-lwb+1)
{
assure(no_potential_symbols > 1,
"Bracketing interval for input symbols should include at least "
"two symbols");
counts = new unsigned int[no_potential_symbols];
memset(counts,0,sizeof(counts[0])*no_potential_symbols);
}
// Destructor
Histogram::~Histogram(void)
{
assert( counts != 0 );
delete [] counts;
}
// Count a symbol
void Histogram::put(const Symbol symbol)
{
if( symbol < symbol_lwb || symbol > symbol_upb )
_error("Symbol %d is out of interval [%d,%d]",
symbol, symbol_lwb, symbol_upb);
counts[symbol-symbol_lwb] += 1;
}
// Count a symbol coercing it to the expected
// range if necessary
void Histogram::put_coerce(const Symbol symbol)
{
counts[ ( symbol < symbol_lwb ? symbol_lwb :
symbol > symbol_upb ? symbol_upb :
symbol ) - symbol_lwb ] += 1;
}
// Return the count for a symbol
int Histogram::get(const Symbol symbol) const
{
if( symbol < symbol_lwb || symbol > symbol_upb )
_error("Symbol %d is out of interval [%d,%d]",
symbol, symbol_lwb, symbol_upb);
return counts[symbol-symbol_lwb];
}
// Give the no. of distinct symbols, ie
// symbols with non-zero count
int Histogram::no_distinct_symbols(void) const
{
register int no = 0;
register unsigned int *cp;
for(cp=&counts[0]; cp<&counts[no_potential_symbols]; )
if( *cp++ != 0 )
no++;
return no;
}
// Print the histogram
void Histogram::print(const char * title) const
{
message("\nHistogram '%s'\n"
"\nValue Quantity\n",title);
register int i;
register int no_symbols = 0;
register int total = 0;
register unsigned int *cp;
for(i=symbol_lwb, cp=&counts[0]; i <= symbol_upb; i++,cp++)
if( *cp != 0 )
{
total += *cp;
no_symbols++;
message("%2d %5d\n",i,*cp);
}
assert( cp == &counts[no_potential_symbols] );
message("\n****** Total no. of symbols %d, total count %d\n\n",
no_symbols,total);
}
// Estimate a zero-order entropy of a source
// represented by the histogram
// Using Shannon formula
// log2(f) - SUM[ fi*log2(fi) ]/f
// where fi is a count for symbol i, f is a total
// count
// The formula tells how many bits are necessary
// to represent a single *average* symbol emitted
// by the (stationary) source
static inline double log2(const double x) { return log(x) * M_LOG2E; }
double Histogram::estimate_entropy(void) const
{
register long total_count = 0;
register unsigned int *cp;
for(cp=&counts[0]; cp<&counts[no_potential_symbols]; cp++)
if( *cp != 0 )
total_count += *cp;
assert( total_count > 0 );
double accum_log = 0;
for(cp=&counts[0]; cp<&counts[no_potential_symbols]; cp++)
if( *cp != 0 )
accum_log += *cp * log2( *cp );
return log2(total_count) - accum_log/total_count;
}